********************************************************
* This replication file makes the tables and figures for
* Jonathan K. Hanson and Rachel Sigman, "Leviathan's Latent Dimensions:
* Measuring State Capacity for Comparative Political Research"
* Journal of Politics, forthcoming 2021
********************************************************

* Note: indicators from Political Risk Services are a commercial product, so we cannot redistsribute them.
* These include: bureau_qual, law_order, and corruption. The original source is the ICRG Researchers
* Dataset. It can be found at https://www.prsgroup.com.

* Note: Figure 1 is made in R (see separate code). This file makes the
* datasets for Figure 1.

* This file assumes that "HansonSigman_source.dta" and "cross-sectional-table5.dta"
* are located in a subdirectory called Data in the working directory

* the estout package is required
* ssc install estout, replace

clear
*cd ""

use "Data/HansonSigman_source.dta"

*****************  Table 1: Indicators of State Capacity

putexcel set "table-1", replace
putexcel A1=("Indicator")
putexcel B1 = (" & ")
putexcel C1=("Countries")
putexcel D1 = (" & ")
putexcel E1=("Years")
putexcel F1 = (" \\ \toprule ")

local i = 2

foreach var of varlist AdmEffic bureau_qual censusfreq irai_erm v2stfisccap infcap law_order milpercap milexpercap bti_mo policecap irai_qbfm irai_qpa v2clrspct StateHist50s v2terr wbstat tax_inc_tax tax_trade_tax taxrev_gdp weberian {
	local lab: variable label `var'
	putexcel A`i' = ("`lab'")
	putexcel B`i' = (" & ")
	quietly sum year if `var' != . & Capacity != .
	putexcel E`i' = ("`r(min)'--`r(max)'")
	putexcel F`i' = (" \\")
	quietly tab cntrynum if `var' !=. & Capacity != . 
	putexcel C`i' = `r(r)', nformat(number_d0)
	putexcel D`i' = (" & ")
	local ++i
}

putexcel save

*****************  Table 2: Correlation of Capacity with Base Indicators

matrix r = J(21,1,.)
matrix N = J(21,1,.)

putexcel set "table-2", replace

putexcel A1=("Indicator")
putexcel B1=(" & ")
putexcel C1=("r")
putexcel D1=(" & ")
putexcel E1=("N")
putexcel F1=("\\ \toprule")

local i = 1
local j = 2
foreach var of varlist censusfreq StateHist50s tax_inc_tax tax_trade_tax taxrev_gdp weberian wbstat v2terr v2clrspct v2stfisccap infcap policecap milexpercap milpercap bureau_qual law_order irai_erm irai_qbfm irai_qpa AdmEffic bti_mo {
	cor Capacity `var'
	matrix r[`i',1]=r(rho)
	matrix N[`i',1]=r(N)
	local lab: variable label `var'
	putexcel A`j' = ("`lab'")
	putexcel B`j' = (" & ")
	putexcel D`j' = (" & ")
	putexcel F`j' = ("\\")
	local ++i
	local ++j
}

putexcel C2 = matrix(r)
putexcel E2 = matrix(N)
putexcel F`i' = ("\\ \bottomrule")

putexcel C2:C`i', overwritefmt nformat(0.00)

putexcel save

*****************  Table 3: Correlations of Capacity with Other Measures

matrix r = J(15,1,.)
matrix N = J(15,1,.)

putexcel set "table-3", replace

putexcel A1=("Indicator")
putexcel B1=(" & ")
putexcel C1=("r")
putexcel D1=(" & ")
putexcel E1=("N")
putexcel F1=("\\ \toprule")

local i = 1
local j = 2
foreach var of varlist wgi_ge wgi_rl wgi_rq bti_rol qs_impar irai_psmiave hendrix_rl iraiscore bti_st bti_mi lnMyers Public_Services ffp_fsi {
	cor Capacity `var'
	matrix r[`i',1]=r(rho)
	matrix N[`i',1]=r(N)
	local lab: variable label `var'
	putexcel A`j' = ("`lab'")
	putexcel B`j' = (" & ")
	putexcel D`j' = (" & ")
	putexcel F`j' = ("\\")
	local ++i
	local ++j
}

putexcel C2 = matrix(r)
putexcel E2 = matrix(N)
putexcel F`i' = ("\\ \bottomrule")

putexcel C2:C`i', overwritefmt nformat(0.00)

putexcel save

*****************  Table 4: Construct Validity Tests for Capacity

gen lnGDPcap = ln(GDPcap)

areg shadow Capacity lnGDPcap, absorb(cntrynum)
estimates store InformalEcon
estadd local fixed "Yes"

areg lnMyers Capacity lnGDPcap, absorb(cntrynum)
estimates store lnMyers
estadd local fixed "Yes"

areg Public_Services Capacity lnGDPcap, absorb(cntrynum)
estimates store PublicServ
estadd local fixed "Yes"

reg returned Capacity lnGDPcap
estimates store Letters
estadd local fixed "No"

reg ave_days Capacity lnGDPcap
estimates store AveDays
estadd local fixed "No"

reg egovernmentindex Capacity lnGDPcap
estimates store eGov
estadd local fixed "No"

label var Capacity "Capacity"

estout InformalEcon lnMyers PublicServ Letters AveDays eGov using "table-4.tex", style(tex) cells(b(star fmt(%4.2f)) se(par fmt(%4.2f))) stats(N r2 fixed, fmt(%4.0f %4.2f %3s) labels("N" "\$R^2\$" "Fixed Effects?") layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}")) title(Construct Validity Tests for Capacity) varlabels(_cons Constant, elist(Capacity [1.5ex] lnGDPcap [1.5ex])) starl(\raisebox{.7ex}[0pt]{\tiny\$\wedge\$} 0.10 \raisebox{.1ex}[0pt]{*} 0.05 \raisebox{.1ex}[0pt]{**} 0.01) mlabels(, span prefix(\multicolumn{@span}{c}{) suffix(})) collabels(, none) prehead("\begin{table}[htp]{\bf \caption{\label{table-capacity-tests}@title}}" "{\footnotesize" "\begin{center}" "\begin{tabular}{l*{5}{D{.}{.}{5.4}D{.}{.}{5.4}D{.}{.}{5.4}}}") numbers(\multicolumn{1}{c}{( )}) posthead(\toprule) prefoot("\cmidrule{2-@span}") postfoot("\bottomrule" "\multicolumn{@span}{c}{{\footnotesize \raisebox{.7ex}[0pt]{\tiny\$\wedge\$} \$p<0.10\$, * \$p<0.05\$, ** \$p<0.01\$}}\\" "\vspace{.25cm}\\" "\end{tabular}" "\end{center}}" "\end{table}") replace

*****************  Table 5: Illustrative Tests Using Capacity

clear
use "Data/cross-sectional-table5.dta"

reg Roads10 Capacity_start lnGDPcapstart Democracy TaxRev 
estimates store Roads

reg water_basic Capacity_start lnGDPcapstart Democracy TaxRev 
estimates store Water

reg Hospitals10 Capacity_start lnGDPcapstart Democracy TaxRev 
estimates store Hospitals

reg Doctors10 Capacity_start lnGDPcapstart Democracy TaxRev 
estimates store Doctors

reg InfMort10 Capacity_start lnGDPcapstart Democracy TaxRev 
estimates store InfMort

reg LifeExp10 Capacity_start lnGDPcapstart Democracy TaxRev 
estimates store LifeExp

reg lnGDPcap10 Capacity_start lnGDPcapstart Democracy TaxRev 
estimates store lnGDPcap10



*** export table
estout InfMort LifeExp Roads Water Hospitals lnGDPcap10 using "table-5.tex", style(tex) cells(b(star fmt(%4.2f)) se(par fmt(%4.2f))) stats(N r2, fmt(%4.0f %4.2f) labels("N" "\$R^2\$") layout("\multicolumn{1}{c}{@}" "@")) title(Illustrative Tests Using Capacity) varlabels(_cons Constant GDPcapstart lnGDPcap60 Capacity_start Capacity60, elist(Capacity_start [1.5ex] Democracy [1.5ex] TaxRev [1.5ex] lnGDPcapstart [1.5ex])) starl(\raisebox{.7ex}[0pt]{\tiny\$\wedge\$} 0.10 \raisebox{.1ex}[0pt]{*} 0.05 \raisebox{.1ex}[0pt]{**} 0.01) mlabels(, span prefix(\multicolumn{@span}{c}{) suffix(})) collabels(, none) prehead("\begin{table}[htp]{\bf \caption{\label{table-capacity-pubservtests}@title}}" "{\footnotesize" "\begin{center}" "\begin{tabular}{l*{5}{D{.}{.}{5.4}D{.}{.}{5.4}D{.}{.}{5.4}}}") numbers(\multicolumn{1}{c}{( )}) posthead(\toprule) prefoot("\cmidrule{2-@span}") postfoot("\bottomrule" "\multicolumn{@span}{c}{{\footnotesize \raisebox{.7ex}[0pt]{\tiny\$\wedge\$} \$p<0.10\$, * \$p<0.05\$, ** \$p<0.01\$}}\\[1.5ex]" "\multicolumn{@span}{p{6.5in}}{\small Cross-sectional OLS regression with standard errors in parentheses.  The dependent variables, measured in the year 2010 for all but Water (2012), are Roads (km of road per 100 squared km), Water (\% of population using at least basic water services), Hospitals (number of hospital beds per 1,000 people), InfMort (infant mortality rate), and LifeExp (level of life expectancy), and log GDP per capita.  The independent variables are GDP/cap\$_{60}\$ (log level of GDP per capita in 1960), Democracy (mean level during the period 1960-2010), TaxRev (mean level of tax revenue as a percentage of GDP over the period 1960-2010), and Capacity in 1960.}" "\end{tabular}" "\end{center}}" "\end{table}") replace


*****************  Make data for Figure 1, to be constructed in R

clear

use "Data/HansonSigman_source"
keep if year==2015
keep country Capacity Capacity_sd
drop if Capacity==.
gsort -Capacity
gen order = _n
egen highest=max(order)
gen half=.5*highest
keep if order< half+.55
drop order highest half
saveold "Data/Capacity_2015_tophalf", replace

clear
use "Data/HansonSigman_source"
keep if year==2015
keep country Capacity Capacity_sd
drop if Capacity==.
gsort -Capacity
gen order = _n
egen highest=max(order)
gen half=.5*highest
keep if order>half+.55
drop order highest half
saveold "Data/Capacity_2015_bottomhalf", replace

*****************  Figure 2: Scatter Plot of Capacity 1975 and 2015

clear
use "Data/HansonSigman_source.dta"

xtset cntrynum year
gen capacity_1975 = Capacity if year == 1975
label var capacity_1975 "Capacity 1975"
label var capacity "Capacity"
replace capacity_1975 = l.capacity if capacity_1975 ==.

gen y = _n - 3
gen x = _n - 3
replace x = . if x>2
replace y= . if y>2

twoway (scatter Capacity capacity_1975 if year == 2015, ///
msymbol(none) mlabel(iso3) mlabposition(0) mlabsize(vsmall)) ///
(connected y x, sort msymbol(none) lpattern(solid)), ///
ytitle(Capacity (2015)) yscale(range(-2.5 2.5)) ///
xtitle(Capacity (1975)) xscale(range(-2.5 2.5)) legend(off)

graph export figure-2.pdf, replace

*****************  Figure 3: Scatter Plot of lnMyers and Capacity

twoway (scatter lnMyers Capacity, msymbol(none) mlabel(iso3) mlabposition(0) mlabsize(vsmall)) (lfit lnMyers Capacity) if Capacity>-2, xlabel(-2(1)3) xtitle(Capacity) ytitle(lnMyers) legend(off)

graph export "figure-3.pdf", replace


